***
** MERGE SER ;

libname unzipped "";
libname zipped "";
libname interm "";

	%MACRO unzipify;
		data _null_;
		   infile inzip lrecl=256 recfm=F length=length eof=eof unbuf;
		   file   ds lrecl=256 recfm=N;
		   input;
		   put _infile_ $varying256. length;
		   return;
		 eof: stop;
		run;
	%MEND;


	** PULL CPI DEFLATOR (2017 $);
	%MACRO PullCPI;
		proc import out = cpi_csv datafile = "CPIAUCSL.csv"  dbms=csv replace; run;
		proc sql noprint;
			CREATE TABLE cpi_temp AS SELECT *, year(DATE) AS year FROM cpi_csv;
			CREATE TABLE cpi AS SELECT year, mean(CPIAUCSL)/245.134 AS cpi FROM cpi_temp WHERE year >= 1951 AND year <= 2014 GROUP BY year;
		proc datasets lib = work; delete cpi_csv cpi_temp; run;
	%MEND;




****
** MERGE KEY SER ELEMENTS IN MBR/ PHUS DATA;

%MACRO add_ser(SEG);

	%PUT ++ Merging SER Segment &SEG.;

	** Unzip raw SER;
	%LET zipstr = ;
	%LET memstr = sum&SEG.sas.sas7bdat;
	filename inzip ZIP "&zipstr" member="&memstr";
	filename ds "%sysfunc(getoption(work))/sum&SEG.sas.sas7bdat" ;
	%unzipify;


	** Pull key variables from SER;
	data ser; set sum&SEG.sas (keep = ssn dob_: dod_: sex race sum_earns_1-sum_earns_64);
		ser_dob=mdy(dob_mo,1,cats(dob_cc,dob_yr));
		ser_dod=mdy(dod_mo,1,cats(dod_cc,dod_yr));
		format ser_dob date9.;
		format ser_dod date9.;
		drop dob_: dod_:;
		rename sex = ser_sex;
		rename race = ser_race;
	run;

				* Keep key cohorts from SER ;
				data ser; set ser; where year(ser_dob) >= 1911 AND year(ser_dob) <= 1950; run;


	** Save demographics from SER;
	data demogr; set ser (keep = ssn ser_dob ser_dod ser_sex ser_race); run;

	** Make an earnings panel from SER;
	proc transpose data = ser  out = ser; by ssn ser_dob ser_dod ser_sex ser_race; run;
	proc sql noprint; 
		CREATE TABLE temp AS SELECT *, year - year(ser_dob) AS age 
		FROM (SELECT SSN AS ssn, ser_dob, ser_dod, COL1 AS nom_sern, input(substr(_NAME_,11,2), 4.) + 1950 AS year FROM ser)
		ORDER BY year;
	data temp; set temp; where age >= 30 & age <= 99; run;
	

	** Convert to real 2017 $;
	%PullCPI;
	data ser; merge temp cpi; by year; run;
	data ser; set ser; sern = nom_sern/cpi; run;


	** Calculate retirement age at the person level (3 versions);
	proc sort data = ser; by ssn year; run;
	data temp; set ser; 
		if ssn = lag(ssn) AND age = lag(age) + 1 AND sern < 1000 AND lag(sern) >= 1000 AND age <= 72 THEN retire1 = age - 1; 
		if ssn = lag(ssn) AND age = lag(age) + 1 AND sern < 1000 AND lag(sern) >= 1000 AND age <= 100 THEN retire2 = age - 1; 
		if ssn = lag(ssn) AND age = lag(age) + 1 AND sern < lag(sern)*0.5 AND age <= 72 THEN retire3 = age - 1; 
	run;	
	proc sql noprint;
		CREATE TABLE retire AS SELECT ssn, max(retire1) AS retire1, max(retire2) AS retire2, max(retire3) AS retire3
		FROM temp GROUP BY ssn;


	** Take person-level average earnings/ years worked for key age ranges;
	proc sql noprint;
		CREATE TABLE temp AS SELECT *,
		(CASE WHEN age >= 30 AND age <= 60 THEN sern ELSE . END) AS sern3060,
		(CASE WHEN age >= 50 AND age <= 60 THEN sern ELSE . END) AS sern5060,
		(CASE WHEN age >= 30 AND age <= 60 AND sern >= 1000 THEN 1 ELSE 0 END) AS workyrs3060
		FROM ser;

		CREATE TABLE perm AS SELECT ssn, mean(sern3060) AS sern3060, mean(sern5060) AS sern5060, sum(workyrs3060) AS workyrs3060
		FROM temp GROUP BY ssn;


	** Set later earnings wide at the person level;
	data temp; set ser (keep = ssn age sern); where age >= 55 AND age <= 71; run;
	proc transpose data = temp out = wide prefix = sern; 
		by ssn; id age; var sern;
	run;
	data wide; set wide (drop = _NAME_); run;


	** Merge these data sets together;
	data mbrphus; set interm.mbrphus&SEG.; ssn = can; run;
	data joined; merge demogr perm retire wide mbrphus; by ssn; run;

		** Assess match quality of demographics;
		data test; set joined (keep = ssn ser_sex sexprimary ser_race race ser_dod bdod ser_dob dob); 
			dmatch = (year(ser_dod) = year(bdod) AND month(ser_dod) = month(bdod));
			if ser_dod = . AND bdod = . THEN dmatch = -9; if ser_dod = . AND bdod ~=. then dmatch = -2; if ser_dod ~=. AND bdod = . THEN dmatch = -1; 
			bmatch = (year(ser_dob) = year(dob) AND month(ser_dob) = month(dob));
			if ser_dob = . AND dob = . THEN bmatch = -9; if ser_dob = . AND dob ~=. then bmatch = -2; if ser_dob ~=. AND dob = . THEN bmatch = -1; 
		proc freq data = test; title "demographic match quality"; table ser_sex*SEXPRIMARY / missing norow nocol; table ser_race*RACE / missing norow nocol; 
		proc freq data = test; table bmatch / missing; table dmatch / missing; run;

	data interm.sermbrphus&SEG. (compress = yes); set joined; run;		

%MEND;

%add_ser(02); %add_ser(03); %add_ser(05); %add_ser(06); %add_ser(09);
%add_ser(10); %add_ser(15); %add_ser(16); %add_ser(19); %add_ser(20);


